1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package com.sun.java.util.jar.pack;
27
28 import java.io.ByteArrayOutputStream;
29 import java.io.IOException;
30 import java.io.OutputStream;
31 import java.util.ArrayList;
32 import java.util.Collections;
33 import java.util.HashSet;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.Random;
37 import java.util.Set;
38 import java.util.zip.Deflater;
39 import java.util.zip.DeflaterOutputStream;
40 import static com.sun.java.util.jar.pack.Constants.*;
41
42
43
44
45
46 class CodingChooser {
47 int verbose;
48 int effort;
49 boolean optUseHistogram = true;
50 boolean optUsePopulationCoding = true;
51 boolean optUseAdaptiveCoding = true;
52 boolean disablePopCoding;
53 boolean disableRunCoding;
54 boolean topLevel = true;
55
56
57
58 double fuzz;
59
60 Coding[] allCodingChoices;
61 Choice[] choices;
62 ByteArrayOutputStream context;
63 CodingChooser popHelper;
64 CodingChooser runHelper;
65
66 Random stress;
67
68
69 static
70 class Choice {
71 final Coding coding;
72 final int index;
73 final int[] distance;
74 Choice(Coding coding, int index, int[] distance) {
75 this.coding = coding;
76 this.index = index;
77 this.distance = distance;
78 }
79
80 int searchOrder;
81 int minDistance;
82 int zipSize;
83 int byteSize;
84 int histSize;
85
86 void reset() {
87 searchOrder = Integer.MAX_VALUE;
88 minDistance = Integer.MAX_VALUE;
89 zipSize = byteSize = histSize = -1;
90 }
91
92 boolean isExtra() {
93 return index < 0;
94 }
95
96 public String toString() {
97 return stringForDebug();
98 }
99
100 private String stringForDebug() {
101 String s = "";
102 if (searchOrder < Integer.MAX_VALUE)
103 s += " so: "+searchOrder;
104 if (minDistance < Integer.MAX_VALUE)
105 s += " md: "+minDistance;
106 if (zipSize > 0)
107 s += " zs: "+zipSize;
108 if (byteSize > 0)
109 s += " bs: "+byteSize;
110 if (histSize > 0)
111 s += " hs: "+histSize;
112 return "Choice["+index+"] "+s+" "+coding;
113 }
114 }
115
116 CodingChooser(int effort, Coding[] allCodingChoices) {
117 PropMap p200 = Utils.currentPropMap();
118 if (p200 != null) {
119 this.verbose
120 = Math.max(p200.getInteger(Utils.DEBUG_VERBOSE),
121 p200.getInteger(Utils.COM_PREFIX+"verbose.coding"));
122 this.optUseHistogram
123 = !p200.getBoolean(Utils.COM_PREFIX+"no.histogram");
124 this.optUsePopulationCoding
125 = !p200.getBoolean(Utils.COM_PREFIX+"no.population.coding");
126 this.optUseAdaptiveCoding
127 = !p200.getBoolean(Utils.COM_PREFIX+"no.adaptive.coding");
128 int lstress
129 = p200.getInteger(Utils.COM_PREFIX+"stress.coding");
130 if (lstress != 0)
131 this.stress = new Random(lstress);
132 }
133
134 this.effort = effort;
135
136
137
138
139 this.allCodingChoices = allCodingChoices;
140
141
142
143
144
145 this.fuzz = 1 + (0.0025 * (effort-MID_EFFORT));
146
147 int nc = 0;
148 for (int i = 0; i < allCodingChoices.length; i++) {
149 if (allCodingChoices[i] == null) continue;
150 nc++;
151 }
152 choices = new Choice[nc];
153 nc = 0;
154 for (int i = 0; i < allCodingChoices.length; i++) {
155 if (allCodingChoices[i] == null) continue;
156 int[] distance = new int[choices.length];
157 choices[nc++] = new Choice(allCodingChoices[i], i, distance);
158 }
159 for (int i = 0; i < choices.length; i++) {
160 Coding ci = choices[i].coding;
161 assert(ci.distanceFrom(ci) == 0);
162 for (int j = 0; j < i; j++) {
163 Coding cj = choices[j].coding;
164 int dij = ci.distanceFrom(cj);
165 assert(dij > 0);
166 assert(dij == cj.distanceFrom(ci));
167 choices[i].distance[j] = dij;
168 choices[j].distance[i] = dij;
169 }
170 }
171 }
172
173 Choice makeExtraChoice(Coding coding) {
174 int[] distance = new int[choices.length];
175 for (int i = 0; i < distance.length; i++) {
176 Coding ci = choices[i].coding;
177 int dij = coding.distanceFrom(ci);
178 assert(dij > 0);
179 assert(dij == ci.distanceFrom(coding));
180 distance[i] = dij;
181 }
182 Choice c = new Choice(coding, -1, distance);
183 c.reset();
184 return c;
185 }
186
187 ByteArrayOutputStream getContext() {
188 if (context == null)
189 context = new ByteArrayOutputStream(1 << 16);
190 return context;
191 }
192
193
194 private int[] values;
195 private int start, end;
196 private int[] deltas;
197 private int min, max;
198 private Histogram vHist;
199 private Histogram dHist;
200 private int searchOrder;
201 private Choice regularChoice;
202 private Choice bestChoice;
203 private CodingMethod bestMethod;
204 private int bestByteSize;
205 private int bestZipSize;
206 private int targetSize;
207
208 private void reset(int[] values, int start, int end) {
209 this.values = values;
210 this.start = start;
211 this.end = end;
212 this.deltas = null;
213 this.min = Integer.MAX_VALUE;
214 this.max = Integer.MIN_VALUE;
215 this.vHist = null;
216 this.dHist = null;
217 this.searchOrder = 0;
218 this.regularChoice = null;
219 this.bestChoice = null;
220 this.bestMethod = null;
221 this.bestZipSize = Integer.MAX_VALUE;
222 this.bestByteSize = Integer.MAX_VALUE;
223 this.targetSize = Integer.MAX_VALUE;
224 }
225
226 public static final int MIN_EFFORT = 1;
227 public static final int MID_EFFORT = 5;
228 public static final int MAX_EFFORT = 9;
229
230 public static final int POP_EFFORT = MID_EFFORT-1;
231 public static final int RUN_EFFORT = MID_EFFORT-2;
232
233 public static final int BYTE_SIZE = 0;
234 public static final int ZIP_SIZE = 1;
235
236 CodingMethod choose(int[] values, int start, int end, Coding regular, int[] sizes) {
237
238 reset(values, start, end);
239
240 if (effort <= MIN_EFFORT || start >= end) {
241 if (sizes != null) {
242 int[] computed = computeSizePrivate(regular);
243 sizes[BYTE_SIZE] = computed[BYTE_SIZE];
244 sizes[ZIP_SIZE] = computed[ZIP_SIZE];
245 }
246 return regular;
247 }
248
249 if (optUseHistogram) {
250 getValueHistogram();
251 getDeltaHistogram();
252 }
253
254 for (int i = start; i < end; i++) {
255 int val = values[i];
256 if (min > val) min = val;
257 if (max < val) max = val;
258 }
259
260
261 int numChoices = markUsableChoices(regular);
262
263 if (stress != null) {
264
265 int rand = stress.nextInt(numChoices*2 + 4);
266 CodingMethod coding = null;
267 for (int i = 0; i < choices.length; i++) {
268 Choice c = choices[i];
269 if (c.searchOrder >= 0 && rand-- == 0) {
270 coding = c.coding;
271 break;
272 }
273 }
274 if (coding == null) {
275 if ((rand & 7) != 0) {
276 coding = regular;
277 } else {
278
279 coding = stressCoding(min, max);
280 }
281 }
282 if (!disablePopCoding
283 && optUsePopulationCoding
284 && effort >= POP_EFFORT) {
285 coding = stressPopCoding(coding);
286 }
287 if (!disableRunCoding
288 && optUseAdaptiveCoding
289 && effort >= RUN_EFFORT) {
290 coding = stressAdaptiveCoding(coding);
291 }
292 return coding;
293 }
294
295 double searchScale = 1.0;
296 for (int x = effort; x < MAX_EFFORT; x++) {
297 searchScale /= 1.414;
298 }
299 int searchOrderLimit = (int)Math.ceil( numChoices * searchScale );
300
301
302 bestChoice = regularChoice;
303 evaluate(regularChoice);
304 int maxd = updateDistances(regularChoice);
305
306
307 int zipSize1 = bestZipSize;
308 int byteSize1 = bestByteSize;
309
310 if (regularChoice.coding == regular && topLevel) {
311
312
313
314
315
316
317 int X = BandStructure.encodeEscapeValue(_meta_canon_max, regular);
318 if (regular.canRepresentSigned(X)) {
319 int Xlen = regular.getLength(X);
320
321
322 regularChoice.zipSize -= Xlen;
323 bestByteSize = regularChoice.byteSize;
324 bestZipSize = regularChoice.zipSize;
325 }
326 }
327
328 int dscale = 1;
329
330 while (searchOrder < searchOrderLimit) {
331 Choice nextChoice;
332 if (dscale > maxd) dscale = 1;
333 int dhi = maxd / dscale;
334 int dlo = maxd / (dscale *= 2) + 1;
335 nextChoice = findChoiceNear(bestChoice, dhi, dlo);
336 if (nextChoice == null) continue;
337 assert(nextChoice.coding.canRepresent(min, max));
338 evaluate(nextChoice);
339 int nextMaxd = updateDistances(nextChoice);
340 if (nextChoice == bestChoice) {
341 maxd = nextMaxd;
342 if (verbose > 5) Utils.log.info("maxd = "+maxd);
343 }
344 }
345
346
347 Coding plainBest = bestChoice.coding;
348 assert(plainBest == bestMethod);
349
350 if (verbose > 2) {
351 Utils.log.info("chooser: plain result="+bestChoice+" after "+bestChoice.searchOrder+" rounds, "+(regularChoice.zipSize-bestZipSize)+" fewer bytes than regular "+regular);
352 }
353 bestChoice = null;
354
355 if (!disablePopCoding
356 && optUsePopulationCoding
357 && effort >= POP_EFFORT
358 && bestMethod instanceof Coding) {
359 tryPopulationCoding(plainBest);
360 }
361
362 if (!disableRunCoding
363 && optUseAdaptiveCoding
364 && effort >= RUN_EFFORT
365 && bestMethod instanceof Coding) {
366 tryAdaptiveCoding(plainBest);
367 }
368
369
370 if (sizes != null) {
371 sizes[BYTE_SIZE] = bestByteSize;
372 sizes[ZIP_SIZE] = bestZipSize;
373 }
374 if (verbose > 1) {
375 Utils.log.info("chooser: result="+bestMethod+" "+
376 (zipSize1-bestZipSize)+
377 " fewer bytes than regular "+regular+
378 "; win="+pct(zipSize1-bestZipSize, zipSize1));
379 }
380 CodingMethod lbestMethod = this.bestMethod;
381 reset(null, 0, 0);
382 return lbestMethod;
383 }
384 CodingMethod choose(int[] values, int start, int end, Coding regular) {
385 return choose(values, start, end, regular, null);
386 }
387 CodingMethod choose(int[] values, Coding regular, int[] sizes) {
388 return choose(values, 0, values.length, regular, sizes);
389 }
390 CodingMethod choose(int[] values, Coding regular) {
391 return choose(values, 0, values.length, regular, null);
392 }
393
394 private int markUsableChoices(Coding regular) {
395 int numChoices = 0;
396 for (int i = 0; i < choices.length; i++) {
397 Choice c = choices[i];
398 c.reset();
399 if (!c.coding.canRepresent(min, max)) {
400
401 c.searchOrder = -1;
402 if (verbose > 1 && c.coding == regular) {
403 Utils.log.info("regular coding cannot represent ["+min+".."+max+"]: "+regular);
404 }
405 continue;
406 }
407 if (c.coding == regular)
408 regularChoice = c;
409 numChoices++;
410 }
411 if (regularChoice == null && regular.canRepresent(min, max)) {
412 regularChoice = makeExtraChoice(regular);
413 if (verbose > 1) {
414 Utils.log.info("*** regular choice is extra: "+regularChoice.coding);
415 }
416 }
417 if (regularChoice == null) {
418 for (int i = 0; i < choices.length; i++) {
419 Choice c = choices[i];
420 if (c.searchOrder != -1) {
421 regularChoice = c;
422 break;
423 }
424 }
425 if (verbose > 1) {
426 Utils.log.info("*** regular choice does not apply "+regular);
427 Utils.log.info(" using instead "+regularChoice.coding);
428 }
429 }
430 if (verbose > 2) {
431 Utils.log.info("chooser: #choices="+numChoices+" ["+min+".."+max+"]");
432 if (verbose > 4) {
433 for (int i = 0; i < choices.length; i++) {
434 Choice c = choices[i];
435 if (c.searchOrder >= 0)
436 Utils.log.info(" "+c);
437 }
438 }
439 }
440 return numChoices;
441 }
442
443
444
445
446 private Choice findChoiceNear(Choice near, int dhi, int dlo) {
447 if (verbose > 5)
448 Utils.log.info("findChoice "+dhi+".."+dlo+" near: "+near);
449 int[] distance = near.distance;
450 Choice found = null;
451 for (int i = 0; i < choices.length; i++) {
452 Choice c = choices[i];
453 if (c.searchOrder < searchOrder)
454 continue;
455
456 if (distance[i] >= dlo && distance[i] <= dhi) {
457
458 if (c.minDistance >= dlo && c.minDistance <= dhi) {
459 if (verbose > 5)
460 Utils.log.info("findChoice => good "+c);
461 return c;
462 }
463 found = c;
464 }
465 }
466 if (verbose > 5)
467 Utils.log.info("findChoice => found "+found);
468 return found;
469 }
470
471 private void evaluate(Choice c) {
472 assert(c.searchOrder == Integer.MAX_VALUE);
473 c.searchOrder = searchOrder++;
474 boolean mustComputeSize;
475 if (c == bestChoice || c.isExtra()) {
476 mustComputeSize = true;
477 } else if (optUseHistogram) {
478 Histogram hist = getHistogram(c.coding.isDelta());
479 c.histSize = (int)Math.ceil(hist.getBitLength(c.coding) / 8);
480 c.byteSize = c.histSize;
481 mustComputeSize = (c.byteSize <= targetSize);
482 } else {
483 mustComputeSize = true;
484 }
485 if (mustComputeSize) {
486 int[] sizes = computeSizePrivate(c.coding);
487 c.byteSize = sizes[BYTE_SIZE];
488 c.zipSize = sizes[ZIP_SIZE];
489 if (noteSizes(c.coding, c.byteSize, c.zipSize))
490 bestChoice = c;
491 }
492 if (c.histSize >= 0) {
493 assert(c.byteSize == c.histSize);
494 }
495 if (verbose > 4) {
496 Utils.log.info("evaluated "+c);
497 }
498 }
499
500 private boolean noteSizes(CodingMethod c, int byteSize, int zipSize) {
501 assert(zipSize > 0 && byteSize > 0);
502 boolean better = (zipSize < bestZipSize);
503 if (verbose > 3)
504 Utils.log.info("computed size "+c+" "+byteSize+"/zs="+zipSize+
505 ((better && bestMethod != null)?
506 (" better by "+
507 pct(bestZipSize - zipSize, zipSize)): ""));
508 if (better) {
509 bestMethod = c;
510 bestZipSize = zipSize;
511 bestByteSize = byteSize;
512 targetSize = (int)(byteSize * fuzz);
513 return true;
514 } else {
515 return false;
516 }
517 }
518
519
520 private int updateDistances(Choice c) {
521
522 int[] distance = c.distance;
523 int maxd = 0;
524 for (int i = 0; i < choices.length; i++) {
525 Choice c2 = choices[i];
526 if (c2.searchOrder < searchOrder)
527 continue;
528 int d = distance[i];
529 if (verbose > 5)
530 Utils.log.info("evaluate dist "+d+" to "+c2);
531 int mind = c2.minDistance;
532 if (mind > d)
533 c2.minDistance = mind = d;
534 if (maxd < d)
535 maxd = d;
536 }
537
538
539 if (verbose > 5)
540 Utils.log.info("evaluate maxd => "+maxd);
541 return maxd;
542 }
543
544
545
546
547 public void computeSize(CodingMethod c, int[] values, int start, int end, int[] sizes) {
548 if (end <= start) {
549 sizes[BYTE_SIZE] = sizes[ZIP_SIZE] = 0;
550 return;
551 }
552 try {
553 resetData();
554 c.writeArrayTo(byteSizer, values, start, end);
555 sizes[BYTE_SIZE] = getByteSize();
556 sizes[ZIP_SIZE] = getZipSize();
557 } catch (IOException ee) {
558 throw new RuntimeException(ee);
559 }
560 }
561 public void computeSize(CodingMethod c, int[] values, int[] sizes) {
562 computeSize(c, values, 0, values.length, sizes);
563 }
564 public int[] computeSize(CodingMethod c, int[] values, int start, int end) {
565 int[] sizes = { 0, 0 };
566 computeSize(c, values, start, end, sizes);
567 return sizes;
568 }
569 public int[] computeSize(CodingMethod c, int[] values) {
570 return computeSize(c, values, 0, values.length);
571 }
572
573 private int[] computeSizePrivate(CodingMethod c) {
574 int[] sizes = { 0, 0 };
575 computeSize(c, values, start, end, sizes);
576 return sizes;
577 }
578 public int computeByteSize(CodingMethod cm, int[] values, int start, int end) {
579 int len = end-start;
580 if (len < 0) {
581 return 0;
582 }
583 if (cm instanceof Coding) {
584 Coding c = (Coding) cm;
585 int size = c.getLength(values, start, end);
586 int size2;
587 assert(size == (size2=countBytesToSizer(cm, values, start, end)))
588 : (cm+" : "+size+" != "+size2);
589 return size;
590 }
591 return countBytesToSizer(cm, values, start, end);
592 }
593 private int countBytesToSizer(CodingMethod cm, int[] values, int start, int end) {
594 try {
595 byteOnlySizer.reset();
596 cm.writeArrayTo(byteOnlySizer, values, start, end);
597 return byteOnlySizer.getSize();
598 } catch (IOException ee) {
599 throw new RuntimeException(ee);
600 }
601 }
602
603 int[] getDeltas(int min, int max) {
604 if ((min|max) != 0)
605 return Coding.makeDeltas(values, start, end, min, max);
606 if (deltas == null) {
607 deltas = Coding.makeDeltas(values, start, end, 0, 0);
608 }
609 return deltas;
610 }
611 Histogram getValueHistogram() {
612 if (vHist == null) {
613 vHist = new Histogram(values, start, end);
614 if (verbose > 3) {
615 vHist.print("vHist", System.out);
616 } else if (verbose > 1) {
617 vHist.print("vHist", null, System.out);
618 }
619 }
620 return vHist;
621 }
622 Histogram getDeltaHistogram() {
623 if (dHist == null) {
624 dHist = new Histogram(getDeltas(0, 0));
625 if (verbose > 3) {
626 dHist.print("dHist", System.out);
627 } else if (verbose > 1) {
628 dHist.print("dHist", null, System.out);
629 }
630 }
631 return dHist;
632 }
633 Histogram getHistogram(boolean isDelta) {
634 return isDelta ? getDeltaHistogram(): getValueHistogram();
635 }
636
637 private void tryPopulationCoding(Coding plainCoding) {
638
639 Histogram hist = getValueHistogram();
640
641 final int approxL = 64;
642 Coding favoredCoding = plainCoding.getValueCoding();
643 Coding tokenCoding = BandStructure.UNSIGNED5.setL(approxL);
644 Coding unfavoredCoding = plainCoding.getValueCoding();
645
646 final int BAND_HEADER = 4;
647
648 int currentFSize;
649 int currentTSize;
650 int currentUSize;
651
652
653
654
655 currentFSize =
656 BAND_HEADER + Math.max(favoredCoding.getLength(min),
657 favoredCoding.getLength(max));
658
659 final int ZERO_LEN = tokenCoding.getLength(0);
660 currentTSize = ZERO_LEN * (end-start);
661
662 currentUSize = (int) Math.ceil(hist.getBitLength(unfavoredCoding) / 8);
663
664 int bestPopSize = (currentFSize + currentTSize + currentUSize);
665 int bestPopFVC = 0;
666
667
668 int[] allFavoredValues = new int[1+hist.getTotalLength()];
669
670
671
672 int targetLowFVC = -1;
673 int targetHighFVC = -1;
674
675
676 int[][] matrix = hist.getMatrix();
677 int mrow = -1;
678 int mcol = 1;
679 int mrowFreq = 0;
680 for (int fvcount = 1; fvcount <= hist.getTotalLength(); fvcount++) {
681
682
683
684 if (mcol == 1) {
685 mrow += 1;
686 mrowFreq = matrix[mrow][0];
687 mcol = matrix[mrow].length;
688 }
689 int thisValue = matrix[mrow][--mcol];
690 allFavoredValues[fvcount] = thisValue;
691 int thisVLen = favoredCoding.getLength(thisValue);
692 currentFSize += thisVLen;
693
694 int thisVCount = mrowFreq;
695 int thisToken = fvcount;
696 currentTSize += (tokenCoding.getLength(thisToken)
697 - ZERO_LEN) * thisVCount;
698
699
700 currentUSize -= thisVLen * thisVCount;
701 int currentSize = (currentFSize + currentTSize + currentUSize);
702
703 if (bestPopSize > currentSize) {
704 if (currentSize <= targetSize) {
705 targetHighFVC = fvcount;
706 if (targetLowFVC < 0)
707 targetLowFVC = fvcount;
708 if (verbose > 4)
709 Utils.log.info("better pop-size at fvc="+fvcount+
710 " by "+pct(bestPopSize-currentSize,
711 bestPopSize));
712 }
713 bestPopSize = currentSize;
714 bestPopFVC = fvcount;
715 }
716 }
717 if (targetLowFVC < 0) {
718 if (verbose > 1) {
719
720 if (verbose > 1)
721 Utils.log.info("no good pop-size; best was "+
722 bestPopSize+" at "+bestPopFVC+
723 " worse by "+
724 pct(bestPopSize-bestByteSize,
725 bestByteSize));
726 }
727 return;
728 }
729 if (verbose > 1)
730 Utils.log.info("initial best pop-size at fvc="+bestPopFVC+
731 " in ["+targetLowFVC+".."+targetHighFVC+"]"+
732 " by "+pct(bestByteSize-bestPopSize,
733 bestByteSize));
734 int oldZipSize = bestZipSize;
735
736
737
738
739
740
741
742
743
744
745 int[] LValuesCoded = PopulationCoding.LValuesCoded;
746 List<Coding> bestFits = new ArrayList<>();
747 List<Coding> fullFits = new ArrayList<>();
748 List<Coding> longFits = new ArrayList<>();
749 final int PACK_TO_MAX_S = 1;
750 if (bestPopFVC <= 255) {
751 bestFits.add(BandStructure.BYTE1);
752 } else {
753 int bestB = Coding.B_MAX;
754 boolean doFullAlso = (effort > POP_EFFORT);
755 if (doFullAlso)
756 fullFits.add(BandStructure.BYTE1.setS(PACK_TO_MAX_S));
757 for (int i = LValuesCoded.length-1; i >= 1; i--) {
758 int L = LValuesCoded[i];
759 Coding c0 = PopulationCoding.fitTokenCoding(targetLowFVC, L);
760 Coding c1 = PopulationCoding.fitTokenCoding(bestPopFVC, L);
761 Coding c3 = PopulationCoding.fitTokenCoding(targetHighFVC, L);
762 if (c1 != null) {
763 if (!bestFits.contains(c1))
764 bestFits.add(c1);
765 if (bestB > c1.B())
766 bestB = c1.B();
767 }
768 if (doFullAlso) {
769 if (c3 == null) c3 = c1;
770 for (int B = c0.B(); B <= c3.B(); B++) {
771 if (B == c1.B()) continue;
772 if (B == 1) continue;
773 Coding c2 = c3.setB(B).setS(PACK_TO_MAX_S);
774 if (!fullFits.contains(c2))
775 fullFits.add(c2);
776 }
777 }
778 }
779
780 for (Iterator<Coding> i = bestFits.iterator(); i.hasNext(); ) {
781 Coding c = i.next();
782 if (c.B() > bestB) {
783 i.remove();
784 longFits.add(0, c);
785 }
786 }
787 }
788 List<Coding> allFits = new ArrayList<>();
789 for (Iterator<Coding> i = bestFits.iterator(),
790 j = fullFits.iterator(),
791 k = longFits.iterator();
792 i.hasNext() || j.hasNext() || k.hasNext(); ) {
793 if (i.hasNext()) allFits.add(i.next());
794 if (j.hasNext()) allFits.add(j.next());
795 if (k.hasNext()) allFits.add(k.next());
796 }
797 bestFits.clear();
798 fullFits.clear();
799 longFits.clear();
800 int maxFits = allFits.size();
801 if (effort == POP_EFFORT)
802 maxFits = 2;
803 else if (maxFits > 4) {
804 maxFits -= 4;
805 maxFits = (maxFits * (effort-POP_EFFORT)
806 ) / (MAX_EFFORT-POP_EFFORT);
807 maxFits += 4;
808 }
809 if (allFits.size() > maxFits) {
810 if (verbose > 4)
811 Utils.log.info("allFits before clip: "+allFits);
812 allFits.subList(maxFits, allFits.size()).clear();
813 }
814 if (verbose > 3)
815 Utils.log.info("allFits: "+allFits);
816 for (Coding tc : allFits) {
817 boolean packToMax = false;
818 if (tc.S() == PACK_TO_MAX_S) {
819
820 packToMax = true;
821 tc = tc.setS(0);
822 }
823 int fVlen;
824 if (!packToMax) {
825 fVlen = bestPopFVC;
826 assert(tc.umax() >= fVlen);
827 assert(tc.B() == 1 || tc.setB(tc.B()-1).umax() < fVlen);
828 } else {
829 fVlen = Math.min(tc.umax(), targetHighFVC);
830 if (fVlen < targetLowFVC)
831 continue;
832 if (fVlen == bestPopFVC)
833 continue;
834 }
835 PopulationCoding pop = new PopulationCoding();
836 pop.setHistogram(hist);
837 pop.setL(tc.L());
838 pop.setFavoredValues(allFavoredValues, fVlen);
839 assert(pop.tokenCoding == tc);
840 pop.resortFavoredValues();
841 int[] tcsizes =
842 computePopSizePrivate(pop,
843 favoredCoding, unfavoredCoding);
844 noteSizes(pop, tcsizes[BYTE_SIZE], BAND_HEADER+tcsizes[ZIP_SIZE]);
845 }
846 if (verbose > 3) {
847 Utils.log.info("measured best pop, size="+bestByteSize+
848 "/zs="+bestZipSize+
849 " better by "+
850 pct(oldZipSize-bestZipSize, oldZipSize));
851 if (bestZipSize < oldZipSize) {
852 Utils.log.info(">>> POP WINS BY "+
853 (oldZipSize - bestZipSize));
854 }
855 }
856 }
857
858 private
859 int[] computePopSizePrivate(PopulationCoding pop,
860 Coding favoredCoding,
861 Coding unfavoredCoding) {
862 if (popHelper == null) {
863 popHelper = new CodingChooser(effort, allCodingChoices);
864 if (stress != null)
865 popHelper.addStressSeed(stress.nextInt());
866 popHelper.topLevel = false;
867 popHelper.verbose -= 1;
868 popHelper.disablePopCoding = true;
869 popHelper.disableRunCoding = this.disableRunCoding;
870 if (effort < MID_EFFORT)
871
872 popHelper.disableRunCoding = true;
873 }
874 int fVlen = pop.fVlen;
875 if (verbose > 2) {
876 Utils.log.info("computePopSizePrivate fvlen="+fVlen+
877 " tc="+pop.tokenCoding);
878 Utils.log.info("{ //BEGIN");
879 }
880
881
882 int[] favoredValues = pop.fValues;
883 int[][] vals = pop.encodeValues(values, start, end);
884 int[] tokens = vals[0];
885 int[] unfavoredValues = vals[1];
886 if (verbose > 2)
887 Utils.log.info("-- refine on fv["+fVlen+"] fc="+favoredCoding);
888 pop.setFavoredCoding(popHelper.choose(favoredValues, 1, 1+fVlen, favoredCoding));
889 if (pop.tokenCoding instanceof Coding &&
890 (stress == null || stress.nextBoolean())) {
891 if (verbose > 2)
892 Utils.log.info("-- refine on tv["+tokens.length+"] tc="+pop.tokenCoding);
893 CodingMethod tc = popHelper.choose(tokens, (Coding) pop.tokenCoding);
894 if (tc != pop.tokenCoding) {
895 if (verbose > 2)
896 Utils.log.info(">>> refined tc="+tc);
897 pop.setTokenCoding(tc);
898 }
899 }
900 if (unfavoredValues.length == 0)
901 pop.setUnfavoredCoding(null);
902 else {
903 if (verbose > 2)
904 Utils.log.info("-- refine on uv["+unfavoredValues.length+"] uc="+pop.unfavoredCoding);
905 pop.setUnfavoredCoding(popHelper.choose(unfavoredValues, unfavoredCoding));
906 }
907 if (verbose > 3) {
908 Utils.log.info("finish computePopSizePrivate fvlen="+fVlen+
909 " fc="+pop.favoredCoding+
910 " tc="+pop.tokenCoding+
911 " uc="+pop.unfavoredCoding);
912
913 StringBuilder sb = new StringBuilder();
914 sb.append("fv = {");
915 for (int i = 1; i <= fVlen; i++) {
916 if ((i % 10) == 0)
917 sb.append('\n');
918 sb.append(" ").append(favoredValues[i]);
919 }
920 sb.append('\n');
921 sb.append("}");
922 Utils.log.info(sb.toString());
923 }
924 if (verbose > 2) {
925 Utils.log.info("} //END");
926 }
927 if (stress != null) {
928 return null;
929 }
930 int[] sizes;
931 try {
932 resetData();
933
934 pop.writeSequencesTo(byteSizer, tokens, unfavoredValues);
935 sizes = new int[] { getByteSize(), getZipSize() };
936 } catch (IOException ee) {
937 throw new RuntimeException(ee);
938 }
939 int[] checkSizes = null;
940 assert((checkSizes = computeSizePrivate(pop)) != null);
941 assert(checkSizes[BYTE_SIZE] == sizes[BYTE_SIZE])
942 : (checkSizes[BYTE_SIZE]+" != "+sizes[BYTE_SIZE]);
943 return sizes;
944 }
945
946 private void tryAdaptiveCoding(Coding plainCoding) {
947 int oldZipSize = bestZipSize;
948
949
950
951
952 int lstart = this.start;
953 int lend = this.end;
954 int[] lvalues = this.values;
955 int len = lend-lstart;
956 if (plainCoding.isDelta()) {
957 lvalues = getDeltas(0,0);
958 lstart = 0;
959 lend = lvalues.length;
960 }
961 int[] sizes = new int[len+1];
962 int fillp = 0;
963 int totalSize = 0;
964 for (int i = lstart; i < lend; i++) {
965 int val = lvalues[i];
966 sizes[fillp++] = totalSize;
967 int size = plainCoding.getLength(val);
968 assert(size < Integer.MAX_VALUE);
969
970 totalSize += size;
971 }
972 sizes[fillp++] = totalSize;
973 assert(fillp == sizes.length);
974 double avgSize = (double)totalSize / len;
975 double sizeFuzz;
976 double sizeFuzz2;
977 double sizeFuzz3;
978 if (effort >= MID_EFFORT) {
979 if (effort > MID_EFFORT+1)
980 sizeFuzz = 1.001;
981 else
982 sizeFuzz = 1.003;
983 } else {
984 if (effort > RUN_EFFORT)
985 sizeFuzz = 1.01;
986 else
987 sizeFuzz = 1.03;
988 }
989
990 sizeFuzz *= sizeFuzz;
991 sizeFuzz2 = (sizeFuzz*sizeFuzz);
992 sizeFuzz3 = (sizeFuzz*sizeFuzz*sizeFuzz);
993
994 double[] dmeshes = new double[1 + (effort-RUN_EFFORT)];
995 double logLen = Math.log(len);
996 for (int i = 0; i < dmeshes.length; i++) {
997 dmeshes[i] = Math.exp(logLen*(i+1)/(dmeshes.length+1));
998 }
999 int[] meshes = new int[dmeshes.length];
1000 int mfillp = 0;
1001 for (int i = 0; i < dmeshes.length; i++) {
1002 int m = (int)Math.round(dmeshes[i]);
1003 m = AdaptiveCoding.getNextK(m-1);
1004 if (m <= 0 || m >= len) continue;
1005 if (mfillp > 0 && m == meshes[mfillp-1]) continue;
1006 meshes[mfillp++] = m;
1007 }
1008 meshes = BandStructure.realloc(meshes, mfillp);
1009
1010 final int BAND_HEADER = 4;
1011
1012 int[] threshes = new int[meshes.length];
1013 double[] fuzzes = new double[meshes.length];
1014 for (int i = 0; i < meshes.length; i++) {
1015 int mesh = meshes[i];
1016 double lfuzz;
1017 if (mesh < 10)
1018 lfuzz = sizeFuzz3;
1019 else if (mesh < 100)
1020 lfuzz = sizeFuzz2;
1021 else
1022 lfuzz = sizeFuzz;
1023 fuzzes[i] = lfuzz;
1024 threshes[i] = BAND_HEADER + (int)Math.ceil(mesh * avgSize * lfuzz);
1025 }
1026 if (verbose > 1) {
1027 System.out.print("tryAdaptiveCoding ["+len+"]"+
1028 " avgS="+avgSize+" fuzz="+sizeFuzz+
1029 " meshes: {");
1030 for (int i = 0; i < meshes.length; i++) {
1031 System.out.print(" " + meshes[i] + "(" + threshes[i] + ")");
1032 }
1033 Utils.log.info(" }");
1034 }
1035 if (runHelper == null) {
1036 runHelper = new CodingChooser(effort, allCodingChoices);
1037 if (stress != null)
1038 runHelper.addStressSeed(stress.nextInt());
1039 runHelper.topLevel = false;
1040 runHelper.verbose -= 1;
1041 runHelper.disableRunCoding = true;
1042 runHelper.disablePopCoding = this.disablePopCoding;
1043 if (effort < MID_EFFORT)
1044
1045 runHelper.disablePopCoding = true;
1046 }
1047 for (int i = 0; i < len; i++) {
1048 i = AdaptiveCoding.getNextK(i-1);
1049 if (i > len) i = len;
1050 for (int j = meshes.length-1; j >= 0; j--) {
1051 int mesh = meshes[j];
1052 int thresh = threshes[j];
1053 if (i+mesh > len) continue;
1054 int size = sizes[i+mesh] - sizes[i];
1055 if (size >= thresh) {
1056
1057 int bend = i+mesh;
1058 int bsize = size;
1059 double bigSize = avgSize * fuzzes[j];
1060 while (bend < len && (bend-i) <= len/2) {
1061 int bend0 = bend;
1062 int bsize0 = bsize;
1063 bend += mesh;
1064 bend = i+AdaptiveCoding.getNextK(bend-i-1);
1065 if (bend < 0 || bend > len)
1066 bend = len;
1067 bsize = sizes[bend]-sizes[i];
1068 if (bsize < BAND_HEADER + (bend-i) * bigSize) {
1069 bsize = bsize0;
1070 bend = bend0;
1071 break;
1072 }
1073 }
1074 int nexti = bend;
1075 if (verbose > 2) {
1076 Utils.log.info("bulge at "+i+"["+(bend-i)+"] of "+
1077 pct(bsize - avgSize*(bend-i),
1078 avgSize*(bend-i)));
1079 Utils.log.info("{ //BEGIN");
1080 }
1081 CodingMethod begcm, midcm, endcm;
1082 midcm = runHelper.choose(this.values,
1083 this.start+i,
1084 this.start+bend,
1085 plainCoding);
1086 if (midcm == plainCoding) {
1087
1088 begcm = plainCoding;
1089 endcm = plainCoding;
1090 } else {
1091 begcm = runHelper.choose(this.values,
1092 this.start,
1093 this.start+i,
1094 plainCoding);
1095 endcm = runHelper.choose(this.values,
1096 this.start+bend,
1097 this.start+len,
1098 plainCoding);
1099 }
1100 if (verbose > 2)
1101 Utils.log.info("} //END");
1102 if (begcm == midcm && i > 0 &&
1103 AdaptiveCoding.isCodableLength(bend)) {
1104 i = 0;
1105 }
1106 if (midcm == endcm && bend < len) {
1107 bend = len;
1108 }
1109 if (begcm != plainCoding ||
1110 midcm != plainCoding ||
1111 endcm != plainCoding) {
1112 CodingMethod chain;
1113 int hlen = 0;
1114 if (bend == len) {
1115 chain = midcm;
1116 } else {
1117 chain = new AdaptiveCoding(bend-i, midcm, endcm);
1118 hlen += BAND_HEADER;
1119 }
1120 if (i > 0) {
1121 chain = new AdaptiveCoding(i, begcm, chain);
1122 hlen += BAND_HEADER;
1123 }
1124 int[] chainSize = computeSizePrivate(chain);
1125 noteSizes(chain,
1126 chainSize[BYTE_SIZE],
1127 chainSize[ZIP_SIZE]+hlen);
1128 }
1129 i = nexti;
1130 break;
1131 }
1132 }
1133 }
1134 if (verbose > 3) {
1135 if (bestZipSize < oldZipSize) {
1136 Utils.log.info(">>> RUN WINS BY "+
1137 (oldZipSize - bestZipSize));
1138 }
1139 }
1140 }
1141
1142 static private
1143 String pct(double num, double den) {
1144 return (Math.round((num / den)*10000)/100.0)+"%";
1145 }
1146
1147 static
1148 class Sizer extends OutputStream {
1149 final OutputStream out;
1150 Sizer(OutputStream out) {
1151 this.out = out;
1152 }
1153 Sizer() {
1154 this(null);
1155 }
1156 private int count;
1157 public void write(int b) throws IOException {
1158 count++;
1159 if (out != null) out.write(b);
1160 }
1161 public void write(byte b[], int off, int len) throws IOException {
1162 count += len;
1163 if (out != null) out.write(b, off, len);
1164 }
1165 public void reset() {
1166 count = 0;
1167 }
1168 public int getSize() { return count; }
1169
1170 public String toString() {
1171 String str = super.toString();
1172
1173 assert((str = stringForDebug()) != null);
1174 return str;
1175 }
1176 String stringForDebug() {
1177 return "<Sizer "+getSize()+">";
1178 }
1179 }
1180
1181 private Sizer zipSizer = new Sizer();
1182 private Deflater zipDef = new Deflater();
1183 private DeflaterOutputStream zipOut = new DeflaterOutputStream(zipSizer, zipDef);
1184 private Sizer byteSizer = new Sizer(zipOut);
1185 private Sizer byteOnlySizer = new Sizer();
1186
1187 private void resetData() {
1188 flushData();
1189 zipDef.reset();
1190 if (context != null) {
1191
1192 try {
1193 context.writeTo(byteSizer);
1194 } catch (IOException ee) {
1195 throw new RuntimeException(ee);
1196 }
1197 }
1198 zipSizer.reset();
1199 byteSizer.reset();
1200 }
1201 private void flushData() {
1202 try {
1203 zipOut.finish();
1204 } catch (IOException ee) {
1205 throw new RuntimeException(ee);
1206 }
1207 }
1208 private int getByteSize() {
1209 return byteSizer.getSize();
1210 }
1211 private int getZipSize() {
1212 flushData();
1213 return zipSizer.getSize();
1214 }
1215
1216
1217
1218
1219 void addStressSeed(int x) {
1220 if (stress == null) return;
1221 stress.setSeed(x + ((long)stress.nextInt() << 32));
1222 }
1223
1224
1225 private CodingMethod stressPopCoding(CodingMethod coding) {
1226 assert(stress != null);
1227
1228 if (!(coding instanceof Coding)) return coding;
1229 Coding valueCoding = ((Coding)coding).getValueCoding();
1230 Histogram hist = getValueHistogram();
1231 int fVlen = stressLen(hist.getTotalLength());
1232 if (fVlen == 0) return coding;
1233 List<Integer> popvals = new ArrayList<>();
1234 if (stress.nextBoolean()) {
1235
1236 Set<Integer> popset = new HashSet<>();
1237 for (int i = start; i < end; i++) {
1238 if (popset.add(values[i])) popvals.add(values[i]);
1239 }
1240 } else {
1241 int[][] matrix = hist.getMatrix();
1242 for (int mrow = 0; mrow < matrix.length; mrow++) {
1243 int[] row = matrix[mrow];
1244 for (int mcol = 1; mcol < row.length; mcol++) {
1245 popvals.add(row[mcol]);
1246 }
1247 }
1248 }
1249 int reorder = stress.nextInt();
1250 if ((reorder & 7) <= 2) {
1251
1252 Collections.shuffle(popvals, stress);
1253 } else {
1254
1255 if (((reorder >>>= 3) & 7) <= 2) Collections.sort(popvals);
1256 if (((reorder >>>= 3) & 7) <= 2) Collections.reverse(popvals);
1257 if (((reorder >>>= 3) & 7) <= 2) Collections.rotate(popvals, stressLen(popvals.size()));
1258 }
1259 if (popvals.size() > fVlen) {
1260
1261 if (((reorder >>>= 3) & 7) <= 2) {
1262
1263 popvals.subList(fVlen, popvals.size()).clear();
1264 } else {
1265
1266 popvals.subList(0, popvals.size()-fVlen).clear();
1267 }
1268 }
1269 fVlen = popvals.size();
1270 int[] fvals = new int[1+fVlen];
1271 for (int i = 0; i < fVlen; i++) {
1272 fvals[1+i] = (popvals.get(i)).intValue();
1273 }
1274 PopulationCoding pop = new PopulationCoding();
1275 pop.setFavoredValues(fvals, fVlen);
1276 int[] lvals = PopulationCoding.LValuesCoded;
1277 for (int i = 0; i < lvals.length / 2; i++) {
1278 int popl = lvals[stress.nextInt(lvals.length)];
1279 if (popl < 0) continue;
1280 if (PopulationCoding.fitTokenCoding(fVlen, popl) != null) {
1281 pop.setL(popl);
1282 break;
1283 }
1284 }
1285 if (pop.tokenCoding == null) {
1286 int lmin = fvals[1], lmax = lmin;
1287 for (int i = 2; i <= fVlen; i++) {
1288 int val = fvals[i];
1289 if (lmin > val) lmin = val;
1290 if (lmax < val) lmax = val;
1291 }
1292 pop.tokenCoding = stressCoding(lmin, lmax);
1293 }
1294
1295 computePopSizePrivate(pop, valueCoding, valueCoding);
1296 return pop;
1297 }
1298
1299
1300 private CodingMethod stressAdaptiveCoding(CodingMethod coding) {
1301 assert(stress != null);
1302
1303 if (!(coding instanceof Coding)) return coding;
1304 Coding plainCoding = (Coding)coding;
1305 int len = end-start;
1306 if (len < 2) return coding;
1307
1308 int spanlen = stressLen(len-1)+1;
1309 if (spanlen == len) return coding;
1310 try {
1311 assert(!disableRunCoding);
1312 disableRunCoding = true;
1313 int[] allValues = values.clone();
1314 CodingMethod result = null;
1315 int scan = this.end;
1316 int lstart = this.start;
1317 for (int split; scan > lstart; scan = split) {
1318 int thisspan;
1319 int rand = (scan - lstart < 100)? -1: stress.nextInt();
1320 if ((rand & 7) != 0) {
1321 thisspan = (spanlen==1? spanlen: stressLen(spanlen-1)+1);
1322 } else {
1323
1324 int KX = (rand >>>= 3) & AdaptiveCoding.KX_MAX;
1325 int KB = (rand >>>= 3) & AdaptiveCoding.KB_MAX;
1326 for (;;) {
1327 thisspan = AdaptiveCoding.decodeK(KX, KB);
1328 if (thisspan <= scan - lstart) break;
1329
1330 if (KB != AdaptiveCoding.KB_DEFAULT)
1331 KB = AdaptiveCoding.KB_DEFAULT;
1332 else
1333 KX -= 1;
1334 }
1335
1336 assert(AdaptiveCoding.isCodableLength(thisspan));
1337 }
1338 if (thisspan > scan - lstart) thisspan = scan - lstart;
1339 while (!AdaptiveCoding.isCodableLength(thisspan)) {
1340 --thisspan;
1341 }
1342 split = scan - thisspan;
1343 assert(split < scan);
1344 assert(split >= lstart);
1345
1346 CodingMethod sc = choose(allValues, split, scan, plainCoding);
1347 if (result == null) {
1348 result = sc;
1349 } else {
1350 result = new AdaptiveCoding(scan-split, sc, result);
1351 }
1352 }
1353 return result;
1354 } finally {
1355 disableRunCoding = false;
1356 }
1357 }
1358
1359
1360 private Coding stressCoding(int min, int max) {
1361 assert(stress != null);
1362 for (int i = 0; i < 100; i++) {
1363 Coding c = Coding.of(stress.nextInt(Coding.B_MAX)+1,
1364 stress.nextInt(Coding.H_MAX)+1,
1365 stress.nextInt(Coding.S_MAX+1));
1366 if (c.B() == 1) c = c.setH(256);
1367 if (c.H() == 256 && c.B() >= 5) c = c.setB(4);
1368 if (stress.nextBoolean()) {
1369 Coding dc = c.setD(1);
1370 if (dc.canRepresent(min, max)) return dc;
1371 }
1372 if (c.canRepresent(min, max)) return c;
1373 }
1374 return BandStructure.UNSIGNED5;
1375 }
1376
1377
1378 private int stressLen(int len) {
1379 assert(stress != null);
1380 assert(len >= 0);
1381 int rand = stress.nextInt(100);
1382 if (rand < 20)
1383 return Math.min(len/5, rand);
1384 else if (rand < 40)
1385 return len;
1386 else
1387 return stress.nextInt(len);
1388 }
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489 }